/* Program prepares imported file on dept level fed funding 
   for merging with the rest of the regression data.

*/


clear
capture log close
set more off

log using "prep_fedfunding_mylog.log", replace


global path "[redacted]"
global programs "${path}programs/"
global logs "${path}logs/"
global rawlbd "/projects/data/lbd/"
global imported "[redacted]"
global inter "${path}data/inter/"
global output "${path}data/output/"
global tables "${path}results/tables/"
global graphs "${path}results/graphs/"

*************************
*** PREP COUNTY DISTS ***
*************************


use ${imported}county_dists.dta, clear

tostring contstcntyfips, replace
tostring surstcntyfips, replace
foreach var in contstcntyfips surstcntyfips {
   replace `var'="0"+`var' if length(`var')==4
}

contract contstcntyfips surstcntyfips
drop _freq

save ${inter}cont_sur_counties.dta, replace

by contstcntyfips: gen surnum=_n
reshape wide surstcntyfips, i(contstcntyfips) j(surnum)

save ${inter}cont_sur_counties_wide.dta, replace


use ${imported}county_dists.dta, clear
drop if dupid==0
contract dupid fice univsysname univsysnum

drop _freq


* deal with the two obs that are troubling the reshape bc they make things non-unique on univsysnum dupid
preserve
   keep if fice==xx | fice==xx | fice==xx | fice==xx
   keep fice univsysname univsysnum dupid
   gen dropnum = 0
   save ${inter}fourfices.dta, replace
restore

* individual university institution codes redacted for disclosure
drop if fice==xx | fice==xx | fice==xx | fice==xx
rename fice fice0
reshape long fice, i(univsysname univsysnum dupid) j(dropnum)
append using ${inter}fourfices.dta
gen notdroppedfrdist = 1 if dropnum==0
drop if fice==.

sort fice

save ${inter}locunivdupes.dta, replace

*************************
*** END COUNTY DISTS ***
*************************


************************
*** FED FUNDING DATA ***
************************

use ${imported}fedfunding_campuslevel_deptwide.dta, clear

sort fice
merge m:1 fice using ${inter}locunivdupes.dta
drop if _merge==2
drop _merge

keep univsysname univsysnum fice city state dupid notdroppedfrdist fy totsuppDOD nonfactotsuppDOD totsuppNASA nonfactotsuppNASA totsuppNIH nonfactotsuppNIH alldepttotsupp nonfacalldepttotsupp

rename city place
rename state statebest

keep if fy>=1976 & fy<=1980

collapse  (sum) fvyrsumTOT=alldepttotsupp fvyrsumDOD=totsuppDOD fvyrsumNIH=totsuppNIH  (mean) avgannTOT=alldepttotsupp avgannDOD=totsuppDOD avgannNIH=totsuppNIH, by(fice univsysname univsysnum place statebest)

merge m:1 place statebest using ${imported}place_fips_xwalk.dta 
keep if _m==3
drop _m

drop popcounty placestate state_fips county_fips place statebest fice

preserve
   contract univsysname univsysnum fips county
   save ${inter}unisys_cnty_crosswalk.dta, replace
restore

* sum funding across universities within a county
collapse (sum) fvyrsumTOT fvyrsumDOD fvyrsumNIH avgannTOT avgannDOD avgannNIH, by(fips)


*** create a small data set where fed funding affects only containing counties
save ${inter}fedfunding_contcnty.dta, replace
***

* now merge to cont-sur data to get surrounding counties
* allowing funding to affect all surrounding counties as well

rename fips contstcntyfips
merge 1:m contstcntyfips using ${inter}cont_sur_counties.dta
keep if _m==3
drop _m

collapse (sum) fvyrsumTOT fvyrsumDOD fvyrsumNIH avgannTOT avgannDOD avgannNIH, by(surstcntyfips)

rename surstcntyfips fips

save ${inter}fedfunding_surcnty.dta, replace


* remove unnecessary files
rm ${inter}locunivdupes.dta


log close









